library("xts")
library("dplyr")
library("data.table")
library("Matrix")
library("reshape2")
library("ggplot2")


#dir = "~/Disk/Research/image_support_test/data_agg_nosize/"
#data_file_name = "AAPL_2019-01-02"

calc_aic <- function(residuals, param_count, model_config){
  p = support_max/delta
  d = length(residuals)
  size = length(residuals[[1]])
  cov = parallel::mclapply( 1:size, function(k){
    u_k = c()
    for (i in 1:d){
      u_k = c(u_k, residuals[[i]][k])
    }
    u_k = matrix(u_k, nrow = d )
    u_k %*% t(u_k)
  }, mc.cores = 32  )
  cov = Reduce("+", cov)
  cov = cov/size
  large_aic = log(det(cov)) + 2*param_count/size
  
  return (large_aic)
}




model_config_list <- c("state",                                 #1
                       "time_customizedtwo",                    #2
                       "state_time_customizedtwo",              #3
                       "hawkes",                                #4
                       "state_hawkes",                          #5
                       "hawkes_time_customizedtwo",             #6
                       "state_hawkes_time_customizedtwo",       #7
                       "hawkes_LASSO",                          #8
                       "state_hawkes_time_customizedtwo_LASSO", #9
                       "hawkes_LASSO_0005",                     #10
                       "state_hawkes_time_customizedtwo_LASSO_0005") #11    




data_files = c("AAPL_2019-01-02", "AAPL_2019-01-03","AAPL_2019-01-04","AAPL_2019-01-07","AAPL_2019-01-08",  
               "AAPL_2019-01-10", "AAPL_2019-01-11","AAPL_2019-01-14","AAPL_2019-01-15","AAPL_2019-01-16", 
               "AAPL_2019-01-17", "AAPL_2019-01-18","AAPL_2019-01-22","AAPL_2019-01-23","AAPL_2019-01-24", 
               "AAPL_2019-01-25", "AAPL_2019-01-28","AAPL_2019-01-29","AAPL_2019-01-30","AAPL_2019-01-31")

support_seq = c(20)
delta = 0.5
max_total_bins = (57600 - 34200)/delta - max(support_seq)/delta
selected_model_config = model_config_list[c(1,2,3,4,10,7,11)]
#config_size = ""
config_size = "_eventsize"

if(config_size == "_eventsize"){
  dir = "~/Disk/Research/image_support_test/data_agg_withsize/"
}else{
  dir = "~/Disk/Research/image_support_test/data_agg_nosize/"
}


############## ggplot over a range of supports #################

pdf_name = paste0("~/Downloads/AAPL_aic_support_range", "_delta", toString(delta), "_",config_size,".pdf")
pdf(pdf_name,width = 10,height = 10)
for (data_file_name in data_files[1]){
#for (data_file_name in data_files){
  print(data_file_name)
  aic_list = list()
  for (support_max in support_seq){
    print(support_max)
    for(model_config in selected_model_config){
      print(model_config)
      if (config_size == "_eventsize"){
        residuals = readRDS(  paste0(dir, "hawkes_residuals", config_size,"_",data_file_name, "_support", toString(support_max), "delta", toString(delta), "_",model_config,".rds") )
      }else{
        residuals = readRDS(  paste0(dir, "hawkes_markov_residuals","_",data_file_name, "_support", toString(support_max), "delta", toString(delta), "_",model_config,".rds") )
      }
      hawkes = readRDS(  paste0(dir, "hawkes_markov_pref_price", config_size,"_",data_file_name, "_support", toString(support_max), "delta", toString(delta),"_",model_config,".rds") )
      est = hawkes$hawkes_kernel_est
      print(paste0("Number of params: ", toString(nnzero(est,na.counted = FALSE))))
      aic_list[[model_config]] = c(aic_list[[model_config]], calc_aic(residuals,est,model_config))
    }
  }
  print(aic_list)
  plot = ggplot(melt(aic_list), aes(x = rep(support_seq,length(selected_model_config)), y = value, color = L1 ) ) + geom_line()+labs(title =paste0(data_file_name," AICs for Delta = ", toString(delta),"_",config_size)) + labs(x = "Supports") + labs(y = "AIC")+labs(color = "Model Type")
  print(plot)
}
dev.off()

##### R basic plot for a fixed support nosize##############
dir = "~/Disk/Research/image_support_test/data_agg_nosize/"
data_file_name = "AAPL_2019-01-02"
config_size = ""
support_seq = c(20)
delta = 0.25
support_max = 20
labels = c("Model I: state only", "Model II: time only", "Model III: state+time", "model IV: Hawkes only",
           "Model V: Hawkes+LASSO", "Model VI: state+time+Hawkes", "Model VII: state+time+Hawkes+LASSO")
## save pdf ###
pdf(paste0("~/Downloads/model_config_plots_support", toString(support_max),"_delta",toString(delta),".pdf"), width = 15, height=10)
for (data_file_name in data_files){
  print(data_file_name)
  aic_list = list()
  param_count = list()
  for (support_max in support_seq){
    print(support_max)
    for(model_config in selected_model_config){
      print(model_config)
      if (config_size == "_eventsize"){
        residuals = readRDS(  paste0(dir, "hawkes_residuals", config_size,"_",data_file_name, "_support", toString(support_max), "delta", toString(delta), "_",model_config,".rds") )
      }else{
        residuals = readRDS(  paste0(dir, "hawkes_markov_residuals","_",data_file_name, "_support", toString(support_max), "delta", toString(delta), "_",model_config,".rds") )
      }
      hawkes = readRDS(  paste0(dir, "hawkes_markov_pref_price", config_size,"_",data_file_name, "_support", toString(support_max), "delta", toString(delta),"_",model_config,".rds") )
      est = hawkes$hawkes_kernel_est

      if (model_config == "time_customizedtwo"){
        param_count[[model_config]] = c(param_count[[model_config]], dim(est)[1]*dim(est)[2]  )
      }else{
        param_count[[model_config]] = c(param_count[[model_config]],  nnzero(est,na.counted = FALSE)  )
      }
      print(paste0("Number of params: ", toString(param_count[[model_config]])))
      aic_list[[model_config]] = c(aic_list[[model_config]], calc_aic(residuals,param_count[[model_config]],model_config))
    }
  }
print(aic_list)
print(param_count)
original.parameters<- par( no.readonly = TRUE )
par(mfrow=c(1,2))
par(xaxt="n")
par(mar=c(20.1,4.1,4.1,4.1))
plot(x = 1:length(selected_model_config), y=aic_list,xlab = "", ylab = "Vector AIC")
title(main = paste0("AICs for ",substr(data_file_name,1,4), " on ",substr(data_file_name,6,15)), line = 3, cex.main = 1)
title(main = "No Size Aggregation", line = 2, cex.main=  0.8)
title(main = paste0(" Maximum Support = ", toString(support_max), " seconds",", Bin-size Delta = ", toString(delta)," seconds"),  line = 1, cex.main = 0.8)
text(1:length(selected_model_config), par("usr")[3] - 0.04, labels = labels, adj=1, cex = 1, xpd = TRUE,srt = 90)
points(which.min(aic_list),aic_list[which.min(aic_list)], col = "red")
legend("topright", legend=c("Minimum AIC"), col = c('red'),pch=c(1), cex=1)

plt = barplot(as.numeric(param_count),xlab = "", ylab = "Number of Effective Parameters", xaxt="n",ylim=c(0,1.2*max(as.numeric(param_count))))
title(main = paste0("# of Effective Parameters for ",substr(data_file_name,1,4), " on ",substr(data_file_name,6,15)), line = 3, cex.main = 1)
title(main = "No Size Aggregation", line = 2, cex.main=  0.8)
title(main = paste0(" Maximum Support = ", toString(support_max), " seconds",", Bin-size Delta = ", toString(delta)," seconds"),  line = 1, cex.main = 0.8)
text(plt, rep(-1000,7), labels = labels, adj=1, cex = 1, xpd = TRUE,srt = 90)
text(plt, as.numeric(param_count)+1000, labels = as.character(as.numeric(param_count)) )
}
dev.off()

##### R basic plot for a fixed support withsize##############  
dir = "~/Disk/Research/image_support_test/data_agg_withsize/"
data_file_name = "AAPL_2019-01-02"
config_size = "_eventsize"
support_seq = c(20)
delta = 0.25
support_max = 20
labels = c("Model I: state only", "Model II: time only", "Model III: state+time", "model IV: Hawkes only",
           "Model V: Hawkes+LASSO", "Model VI: state+time+Hawkes", "Model VII: state+time+Hawkes+LASSO")

pdf(paste0("~/Downloads/model_config_plots_eventsize_support", toString(support_max),"_delta",toString(delta),".pdf"),width = 15, height=10)

for (data_file_name in data_files){
  print(data_file_name)
  aic_list = list()
  param_count = list()
  for (support_max in support_seq){
    print(support_max)
    for(model_config in selected_model_config){
      print(model_config)
      if (config_size == "_eventsize"){
        residuals = readRDS(  paste0(dir, "hawkes_residuals", config_size,"_",data_file_name, "_support", toString(support_max), "delta", toString(delta), "_",model_config,".rds") )
      }else{
        residuals = readRDS(  paste0(dir, "hawkes_markov_residuals","_",data_file_name, "_support", toString(support_max), "delta", toString(delta), "_",model_config,".rds") )
      }
      hawkes = readRDS(  paste0(dir, "hawkes_markov_pref_price", config_size,"_",data_file_name, "_support", toString(support_max), "delta", toString(delta),"_",model_config,".rds") )
      est = hawkes$hawkes_kernel_est
      
      if (model_config == "time_customizedtwo"){
        param_count[[model_config]] = c(param_count[[model_config]], dim(est)[1]*dim(est)[2]  )
      }else{
        param_count[[model_config]] = c(param_count[[model_config]],  nnzero(est,na.counted = FALSE)  )
      }
      print(paste0("Number of params: ", toString(param_count[[model_config]])))
      aic_list[[model_config]] = c(aic_list[[model_config]], calc_aic(residuals,param_count[[model_config]],model_config))
    }
  }
  print(aic_list)
  print(param_count)
  original.parameters<- par( no.readonly = TRUE )
  par(mfrow=c(1,2))
  par(xaxt="n")
  par(mar=c(20.1,4.1,4.1,4.1))
  plot(x = 1:length(selected_model_config), y=aic_list,xlab = "", ylab = "Vector AIC")
  title(main = paste0("AICs for ",substr(data_file_name,1,4), " on ",substr(data_file_name,6,15)), line = 3, cex.main = 1)
  title(main = "With Size Aggregation", line = 2, cex.main=  0.8)
  title(main = paste0(" Maximum Support = ", toString(support_max), " seconds",", Bin-size Delta = ", toString(delta)," seconds"),  line = 1, cex.main = 0.8)
  text(1:length(selected_model_config), par("usr")[3] - 0.04, labels = labels, adj=1, cex = 1, xpd = TRUE,srt = 90)
  points(which.min(aic_list),aic_list[which.min(aic_list)], col = "red")
  legend("topright", legend=c("Minimum AIC"), col = c('red'),pch=c(1), cex=1)
  
  plt = barplot(as.numeric(param_count),xlab = "", ylab = "Number of Effective Parameters", xaxt="n",ylim=c(0,1.2*max(as.numeric(param_count))))
  title(main = paste0("# of Effective Parameters for ",substr(data_file_name,1,4), " on ",substr(data_file_name,6,15)), line = 3, cex.main = 1)
  title(main = "With Size Aggregation", line = 2, cex.main=  0.8)
  title(main = paste0(" Maximum Support = ", toString(support_max), " seconds",", Bin-size Delta = ", toString(delta)," seconds"),  line = 1, cex.main = 0.8)
  text(plt, rep(-1000,7), labels = labels, adj=1, cex = 1, xpd = TRUE,srt = 90)
  text(plt, as.numeric(param_count)+1000, labels = as.character(as.numeric(param_count)) )
}
dev.off()

###########Generate plots ##################
aic_matrix_nosize = readRDS("~/Downloads/model_config_aic_matrix.rds")
aic_matrix_withsize = readRDS("~/Downloads/model_config_aic_matrix_withsize.rds")

pdf("~/Downloads/model_config_plots_with_nosize.pdf", width = 2*5, height = 5*dim(aic_matrix_nosize)[1])
par(mfrow=c(dim(aic_matrix_nosize)[1],2))
for (date_file_name in rownames(aic_matrix_nosize)){
aic_list_nosize = aic_matrix_nosize[date_file_name,]
aic_list_withsize = aic_matrix_withsize[date_file_name,]

plot(x = 1:8, y=aic_list_nosize,xlab = "Index", ylab = "Vector AIC", main = paste0(date_file_name, "_nosize"))
text(x = 1:8, y = aic_list_nosize,labels =  model_config_list, cex = 0.8, font = 2, pos = c(4,3,3,3,3,2,1,2))
points(which.min(aic_list_nosize), aic_list_nosize[which.min(aic_list_nosize)], col = "red")

plot(x = 1:8, y=aic_list_withsize,xlab = "Index", ylab = "Vector AIC",  main = paste0(date_file_name, "_withsize"))
text(x = 1:8, y = aic_list_withsize,labels =  model_config_list, cex = 0.8, font = 2, pos = c(4,3,3,3,3,2,1,2))
points(which.min(aic_list_withsize), aic_list_withsize[which.min(aic_list_withsize)], col = "red")
}

dev.off()


aic_matrix = readRDS("~/Downloads/model_config_aic_matrix.rds")
aic_list = aic_matrix[1,]

par(mar = c(15,2,2,2))
original.parameters<- par( no.readonly = TRUE )
par(xaxt="n")
plot(x = 1:11, y=aic_list,xlab = "", ylab = "Vector AIC", main = data_file_name)
axis(1, at=seq(1, 11, by=1), labels = FALSE)
text(1:11, par("usr")[3]-0.05, labels = model_config_list, adj=1, cex = 0.8, xpd = TRUE,srt = 90)
points(which.min(aic_list),aic_list[which.min(aic_list)], col = "red")


########## plot over a range of supports nosize #############

dir = "~/Disk/Research/image_support_test/data_agg_nosize/"
#data_file_name = "AAPL_2019-01-04"
model_config = "state_hawkes_time_customizedtwo"
support_list = seq(1,20,1)
delta = 0.5

data_files = c("AAPL_2019-01-02", "AAPL_2019-01-03","AAPL_2019-01-04","AAPL_2019-01-07","AAPL_2019-01-08",  
             "AAPL_2019-01-10", "AAPL_2019-01-11","AAPL_2019-01-14","AAPL_2019-01-15","AAPL_2019-01-16", 
             "AAPL_2019-01-17", "AAPL_2019-01-18","AAPL_2019-01-22","AAPL_2019-01-23","AAPL_2019-01-24", 
             "AAPL_2019-01-25", "AAPL_2019-01-28","AAPL_2019-01-29","AAPL_2019-01-30","AAPL_2019-01-31")
longer_date_files = c("AAPL_2019-01-03", "AAPL_2019-01-07","AAPL_2019-01-14","AAPL_2019-01-16",
                    "AAPL_2019-01-17","AAPL_2019-01-18","AAPL_2019-01-24","AAPL_2019-01-28")

pdf("~/Disk/Research/image_support_test/AAPL_support1to20_delta0.5.pdf", width = 2*5, height = 5*5)
par(mfrow=c(5,2))
for (data_file_name in data_files){
aic_list = c()
print (data_file_name)
if(data_file_name %in% longer_date_files){
  support_list = seq(1,40,1)
}else{
  support_list = seq(1,20,1)
}

for (support_max in support_list){
  print(support_max)
  residuals = readRDS(  paste0(dir, "hawkes_markov_residuals_", data_file_name, "_support", toString(support_max), "delta", toString(delta), "_",model_config,".rds") )
  hawkes = readRDS(  paste0(dir, "hawkes_markov_pref_price_", data_file_name, "_support", toString(support_max), "delta", toString(delta),"_",model_config,".rds") )
  est = hawkes$hawkes_kernel_est
  est_state = est[rownames(est) %like% "state",]
  est_price = est[rownames(est) %like% "price",]
  est_time = est[rownames(est) %like% "time",]
  if (model_config == "state_price_hawkes" || model_config == "state_price" || model_config == "state_hawkes_time5min"|| model_config == "state_hawkes_time_customizedone"||model_config == "state_hawkes_time_customizedtwo"){
    add_param = dim(est_price)[1] + dim(est_state)[1] + dim(est_time)[1] -1
  }else if( model_config == "hawkes"){
    add_param = dim(est_price)[1] + dim(est_state)[1] + dim(est_time)[1] + 1
  } else if (model_config == "state_hawkes" || model_config == "price_hawkes"){
    add_param = dim(est_price)[1] + dim(est_state)[1] + dim(est_time)[1]
  } else{
    add_param = dim(est_price)[1] + dim(est_state)[1] + dim(est_time)[1] - 2
  }
  print (paste0(toString(add_param), " added parameters."))
  aic_list = c(aic_list,calc_large_aic(residuals, add_param, model_config) )
}


#  png(paste0("~/Disk/Research/image_support_test/mini_aic_", data_file_name,"_delta" ,toString(delta),"_support1to20.png" ))
plot( support_list, aic_list, pch = 20, col= "blue", cex = 0.5, main =paste0(data_file_name," AICs for Delta = ", toString(delta)), 
      xlab ="Support", ylab = "Vector AIC")
abline( v = support_list[which.min(aic_list)], col = "red")
#  dev.off()

}
dev.off()




aic_list_025 = readRDS("~/Desktop/Research/image_support_test/delta0.25_aic.rds")

par(mfrow=c(1,2))
plot( seq(1,20,0.5), aic_list, pch = 20, col= "blue", cex = 0.5, main = "AIC for delta = 0.5s", 
    xlab ="Support", ylab = "Vector AIC")
abline( v = seq(1,20,0.5)[which.min(aic_list)], col = "red")


plot( seq(1,20,0.25), aic_list_025, pch = 20, col= "blue", cex = 0.5, main = "AIC for delta = 0.25s", 
    xlab ="support", ylab = "Vector AIC")
abline( v = seq(1,20,0.25)[which.min(aic_list_025)], col = "red")


########## plot over a range of supports withsize #############

dir = "~/Disk/Research/image_support_test/data_agg_withsize/"
data_file_name = "AAPL_2019-01-02"
#model_config = "state_hawkes_time_customizedtwo"
model_config = "hawkes"
support_list = seq(1,20,1)
delta = 0.5

data_files = c("AAPL_2019-01-02", "AAPL_2019-01-03","AAPL_2019-01-04","AAPL_2019-01-07","AAPL_2019-01-08",  
               "AAPL_2019-01-10", "AAPL_2019-01-11","AAPL_2019-01-14","AAPL_2019-01-15","AAPL_2019-01-16", 
               "AAPL_2019-01-17", "AAPL_2019-01-18","AAPL_2019-01-22","AAPL_2019-01-23","AAPL_2019-01-24", 
               "AAPL_2019-01-25", "AAPL_2019-01-28","AAPL_2019-01-29","AAPL_2019-01-30","AAPL_2019-01-31")

pdf(paste0("~/Disk/Research/image_support_test/AAPL_support1to20_delta0.5_withsize_", model_config,".pdf"), width = 2*5, height = 5*5)
par(mfrow=c(5,2))
for (data_file_name in data_files){
  aic_list = c()
  print (data_file_name)
  
  for (support_max in support_list){
    print(support_max)
    residuals = readRDS(  paste0(dir, "hawkes_residuals_eventsize_", data_file_name, "_support", toString(support_max), "delta", toString(delta), "_",model_config,".rds") )
    hawkes = readRDS(  paste0(dir, "hawkes_markov_pref_price_eventsize_", data_file_name, "_support", toString(support_max), "delta", toString(delta),"_",model_config,".rds") )
    est = hawkes$hawkes_kernel_est
    est_state = est[rownames(est) %like% "state",]
    est_time = est[rownames(est) %like% "time",]
    if ( model_config == "state_time_customizedtwo" || model_config == "state_hawkes_time5min"||model_config == "state_hawkes_time1min"|| model_config == "state_hawkes_time_customizedone"||model_config == "state_hawkes_time_customizedtwo"||model_config == "state_hawkes_time_customizedthree"){
      add_param =  dim(est_state)[1] + dim(est_time)[1] -1
    }else if( model_config == "hawkes"){
      add_param =  dim(est_state)[1] + dim(est_time)[1] + 1
    } else if (model_config == "state" || model_config == "time_customizedtwo" || model_config == "state_hawkes" || model_config == "hawkes_time_customizedtwo"){
      add_param = dim(est_state)[1] + dim(est_time)[1]
    } else{
      add_param = dim(est_state)[1] + dim(est_time)[1] - 2
    }
    print (paste0(toString(add_param), " added parameters."))
    aic_list = c(aic_list,calc_large_aic(residuals, add_param, model_config) )
  }
  
  print(support_list[which.min(aic_list)])
  plot( support_list, aic_list, pch = 20, col= "blue", cex = 0.5, main =paste0(data_file_name," AICs for Delta = ", toString(delta)),
        xlab ="Support", ylab = "Vector AIC")
  abline( v = support_list[which.min(aic_list)], col = "red")

}
dev.off()




aic_list_025 = readRDS("~/Desktop/Research/image_support_test/delta0.25_aic.rds")

par(mfrow=c(1,2))
plot( seq(1,20,0.5), aic_list, pch = 20, col= "blue", cex = 0.5, main = "AIC for delta = 0.5s", 
    xlab ="Support", ylab = "Vector AIC")
abline( v = seq(1,20,0.5)[which.min(aic_list)], col = "red")


plot( seq(1,20,0.25), aic_list_025, pch = 20, col= "blue", cex = 0.5, main = "AIC for delta = 0.25s", 
    xlab ="support", ylab = "Vector AIC")
abline( v = seq(1,20,0.25)[which.min(aic_list_025)], col = "red")


########## plot over a range of delta #############
dir = "~/Disk/Research/image_support_test/data_agg_nosize/"
#data_file_name = "AAPL_2019-01-04"
model_config = "state_hawkes_time_customizedtwo"
support_max = 60
delta_list = c(0.5,0.25)

data_files = c("AAPL_2019-01-02", "AAPL_2019-01-03","AAPL_2019-01-04","AAPL_2019-01-07","AAPL_2019-01-08",  
             "AAPL_2019-01-10", "AAPL_2019-01-11","AAPL_2019-01-14","AAPL_2019-01-15","AAPL_2019-01-16", 
             "AAPL_2019-01-17", "AAPL_2019-01-18","AAPL_2019-01-22","AAPL_2019-01-23","AAPL_2019-01-24", 
             "AAPL_2019-01-25", "AAPL_2019-01-28","AAPL_2019-01-29","AAPL_2019-01-30","AAPL_2019-01-31")


pdf("~/Downloads/AAPL_range_of_delta.pdf")
for (data_file_name in data_files){
aic_list = c()
print (data_file_name)
for (delta in delta_list){
  print(delta)
  residuals = readRDS(  paste0(dir, "hawkes_markov_residuals_", data_file_name, "_support", toString(support_max), "delta", toString(delta), "_",model_config,".rds") )
  hawkes = readRDS(  paste0(dir, "hawkes_markov_pref_price_", data_file_name, "_support", toString(support_max), "delta", toString(delta),"_",model_config,".rds") )
  est = hawkes$hawkes_kernel_est
  est_state = est[rownames(est) %like% "state",]
  est_price = est[rownames(est) %like% "price",]
  est_time = est[rownames(est) %like% "time",]
  if (model_config == "state_price_hawkes" || model_config == "state_price" || model_config == "state_hawkes_time5min"|| model_config == "state_hawkes_time_customizedone"||model_config == "state_hawkes_time_customizedtwo"){
    add_param = dim(est_price)[1] + dim(est_state)[1] + dim(est_time)[1] -1
  }else if( model_config == "hawkes"){
    add_param = dim(est_price)[1] + dim(est_state)[1] + dim(est_time)[1] + 1
  } else if (model_config == "state_hawkes" || model_config == "price_hawkes"){
    add_param = dim(est_price)[1] + dim(est_state)[1] + dim(est_time)[1]
  } else{
    add_param = dim(est_price)[1] + dim(est_state)[1] + dim(est_time)[1] - 2
  }
  print (paste0(toString(add_param), " added parameters."))
  aic_list = c(aic_list,calc_large_aic(residuals, add_param, model_config) )
}
print (aic_list)
plot( delta_list, aic_list, pch = 20, col= "blue", cex = 0.5, main =paste0(data_file_name," AICs for support = ",toString(support_max)), 
      xlab ="Bin Size Delta", ylab = "Vector AIC")
abline( v = delta_list[which.min(aic_list)], col = "red")
#  dev.off()

}
dev.off()
